# coding=utf-8
import math
import numpy as np
import os, random
import os.path as osp
import scipy.stats as sstat
import freetype
from PIL import Image
import sys
from freetype import *
import cv2
import string
import re
from zhon.hanzi import punctuation 
import scipy.io as scio

LINESOFF = 0.1
TITLEOFF= 10
PARASOFF = 10
VPARASOFF = -20
regionImageType = ['image_with_text', 'image_with_textGT', 'image_without_text']

class FontClass(object):
    """
    文档图像内容填充
    """
    def __init__(self, config, region_i, surface, imageName, dateSaveDir, region_image_type=0, font_size=(25, 45, 30), indentation=1, font_name=None, thresh_color=(0, 80), isheading=False, random_bg=False, Trans = False, readir=0):
        self.config = config
        self.regionInfo = self.config.regionList[region_i] 
        self.surface = surface
        self.imageName = imageName
        self.dateSaveDir = dateSaveDir
        self.punc = punctuation + string.punctuation                # 标点过滤
        self.region_image_type = regionImageType[region_image_type] # 插入图像的类型，针对不同训练目的
        self.indentation = indentation # 首行缩进字符数
        self.font_size = font_size # 字体大小范围
        self.isheading = isheading
        self.color_diff = thresh_color
        self.thresh_color = thresh_color[0] # 背景与前景的颜色差异阈值,默认为0即黑色字体
        self.random_bg = random_bg
        self.readir = readir
        # if self.isheading or self.regionInfo['attr'] == 'title':
            # rv = random.random()

        self.Trans = Trans # 字体形变

        # if self.regionInfo['attr'] != 'image' and self.regionInfo['attr'] != 'formula' and self.regionInfo['attr'] != 'tableimg' and self.regionInfo['attr'] != 'imagegraph':
        if self.regionInfo['attr'] != 'formula': # 除了公式外，其他内容皆需要文本填充
            if self.regionInfo['lan'] == 'en':
                fontdict = 'en'
            elif self.regionInfo['lan'] == 'cn':
                fontdict = 'cn'
                self.indentation = indentation * 2
            elif self.regionInfo['lan'] == 'jp':
                self.indentation = indentation * 2
                fontdict = 'cn'
            
            contentPath = os.path.join('./content', self.regionInfo['lan'])
            contentSet = []
            for _, _, filenames in os.walk(contentPath):
                for filename in filenames:
                    if filename.endswith('txt'):
                        contentSet.append(filename)
            content_name = random.choice(contentSet)
            # print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$   "+content_name)
          
            fontSet = []
            self.size = random.randint(font_size[0], font_size[1]) if font_size[2] == None else font_size[2]
            fontPath = os.path.join('./font', fontdict)
            if self.regionInfo['attr'] == 'title':
                fontPath = os.path.join('./font', fontdict+'_title') 
                self.size = self.size * random.randint(2,3)
            elif self.regionInfo['attr'] == 'text' and isheading:
                fontPath = os.path.join('./font', fontdict+'_bold')
                self.size = random.randint(int((font_size[0]+font_size[1])//2), font_size[1]) if font_size[2] == None else random.randint(self.size, font_size[1])
            elif self.regionInfo['attr'] != 'text':
                self.size = random.randint(font_size[0], int((font_size[0]+font_size[1])//2)) if font_size[2] == None else random.randint(font_size[0], self.size)
            
            if font_name is None or self.regionInfo['attr'] != 'text' or isheading:
                for _, _, filenames in os.walk(fontPath):
                    for filename in filenames:
                        if filename.endswith('ttf'):
                            fontSet.append(filename)
                font_name = random.choice(fontSet)
            
            self.face = freetype.Face(os.path.join(fontPath,font_name))
            print("FontPath {}, Properties scalable:{}, has_vertical:{}".format(fontdict + "/" + font_name, self.face.is_scalable, self.face.has_vertical))

            self.LINESOFF = int(LINESOFF * self.size + 0.5)
            self.face.set_char_size(self.size * 64)
            self.slot = self.face.glyph
            self.textSample = TextSource(1, os.path.join(contentPath,content_name))

            if self.Trans: #TODO
                angle = 25#np.random.randint(-25,25)
                angle = (angle/180.0)*math.pi
                matrix  = FT_Matrix( (int)( math.cos( angle ) * 0x10000 ),
                                    (int)(-math.sin( angle ) * 0x10000 ),
                                    (int)( math.sin( angle ) * 0x10000 ),
                                    (int)( math.cos( angle ) * 0x10000 ))
                pen = FT_Vector(0,0)
                FT_Set_Transform( self.face._FT_Face, byref(matrix), byref(pen) )
        
        # self.draw_rec(2)
        
    def draw_rec(self, line_width):
        '''
        画示意框，测试用，实际使用时关闭
        '''
        x = self.regionInfo['x']
        y = self.regionInfo['y']
        w = self.regionInfo['w']
        h = self.regionInfo['h']
        self.surface[y:y+line_width, x:x+w, :] = 0
        self.surface[y+h:y+h+line_width, x:x+w, :] = 0
        self.surface[y:y+h, x:x+line_width, :] = 0
        self.surface[y:y+h, x+w:x+w+line_width, :] = 0

    def color_func(self, bitbuffer, rand=False, color='black'):
        '''
        为文字上色，默认为随机上色，也可以指定一种颜色
        '''
        h, w = bitbuffer.shape
        buffer_mask = bitbuffer>0
        wordMixed = np.full((h,w,3), 255, dtype=np.ubyte)

        color_value = self.config.color_RGB[color]
        if rand:
            color_value = random.choice(list(self.config.color_RGB.values())) 

        wordMixed[:, :, 0][buffer_mask] = color_value[0]
        wordMixed[:, :, 1][buffer_mask] = color_value[1]
        wordMixed[:, :, 2][buffer_mask] = color_value[2]

        return wordMixed
    
    def random_sum(self, num, summation):
        # num: para number
        # summation: total lines number
        minparaline = 3
        # print(num, summation)
        if summation < num:
            return [summation]
        number = [0]
        for i in range(num-1):
            # remain = summation + i + 1 - num - sum(number)
            remain = summation - (num - i - 1)*minparaline - sum(number)
            if((int)(remain)<minparaline):
                a = (int)(remain)
            else:
                a = random.randint(minparaline, (int)(remain))
            number.append(a)
        remain = summation - sum(number)
        # print(remain)
        number.append((int)(remain))
        return number[1:]

    def random_para(self):
        line_spacing = self.regionInfo['lineSpacing']+self.LINESOFF
        if self.regionInfo['attr'] == 'title':
            line_spacing = line_spacing+TITLEOFF
        para_spacing = self.regionInfo['paraSpacing']+PARASOFF  
        
        min_line_perpara = 3
        remain_h = self.regionInfo['h'] - self.regionInfo['blank']
        min_para_h = (self.size + line_spacing)*(min_line_perpara-1) + self.size
        para_line_num = []
        para_num = 0
        while remain_h - min_para_h >= 0: #剩余区域可以单独成为一段
            max_line = int((remain_h - self.size) / (self.size + line_spacing) + 1)
            line_num = random.randint(min_line_perpara, max_line)  
            # print("%d, %d"%(line_num, max_line))
            para_line_num.append(line_num)
            para_num += 1
            remain_h = remain_h - ((line_num-1) * (self.size + line_spacing) + self.size + para_spacing)
        if remain_h > 0:  #还有剩余区域并且剩余区域不可成为单独一段
            if len(para_line_num) == 0: #整个区域也不足以成为最少行的单独段,直接分配(标题)
                if self.regionInfo['attr'] == 'text': # 少于min_line_perpara尽量填充多行
                    self.size = self.font_size[0]
                    self.LINESOFF = int(LINESOFF * self.size + 0.5)
                    self.face.set_char_size(self.size * 64)
                para_line_num.append(int((remain_h - self.size) / (self.size + line_spacing)) + 1)
                para_num = 1
            else: #剩余区域填充行与最后一段合并
                # remain_linenum = int((remain_h + para_spacing - self.size) / (self.size + line_spacing)) + 1
                remain_linenum = int((remain_h + para_spacing) / (self.size + line_spacing))
                # print("last para %d, %d"%(remain_h + para_spacing, remain_linenum))
                para_line_num[-1] = para_line_num[-1] + remain_linenum
        
        return sum(para_line_num), para_line_num

    def random_para_v(self):
        line_spacing = self.regionInfo['lineSpacing']+self.LINESOFF
        if self.regionInfo['attr'] == 'title':
            line_spacing = line_spacing+TITLEOFF
        para_spacing = self.regionInfo['paraSpacing']+PARASOFF #+VPARASOFF
        
        min_line_perpara = 3
        remain_w = self.regionInfo['w'] - self.regionInfo['blank']
        min_para_w = (self.size + line_spacing)*(min_line_perpara-1) + self.size
        para_line_num = []
        para_num = 0
        while remain_w - min_para_w >= 0: #剩余区域可以单独成为一段
            max_line = int((remain_w - self.size) / (self.size + line_spacing) + 1)
            line_num = random.randint(min_line_perpara, max_line)  
            para_line_num.append(line_num)
            para_num += 1
            remain_w = remain_w - ((line_num-1) * (self.size + line_spacing) + self.size + para_spacing)
        if remain_w > 0:  #还有剩余区域并且剩余区域不可成为单独一段
            if len(para_line_num) == 0: #整个区域也不足以成为最少行的单独段,直接分配(标题)
                if self.regionInfo['attr'] == 'text': # 少于min_line_perpara尽量填充多行
                    self.size = self.font_size[0]
                    self.LINESOFF = int(LINESOFF * self.size + 0.5)
                    self.face.set_char_size(self.size * 64)
                para_line_num.append(int((remain_w - self.size) / (self.size + line_spacing)) + 1)
                para_num = 1
            else: #剩余区域填充行与最后一段合并
                # remain_linenum = int((remain_w + para_spacing - self.size) / (self.size + line_spacing)) + 1
                remain_linenum = int((remain_w + para_spacing) / (self.size + line_spacing))
                para_line_num[-1] = para_line_num[-1] + remain_linenum
        
        return sum(para_line_num), para_line_num

    # def render_heading(self, word):
    #     fontPath = os.path.join('./font', fontdict+'_title')  
    #     for _, _, filenames in os.walk(fontPath):
    #         for filename in filenames:
    #             if filename.endswith('ttf'):
    #                 fontSet.append(filename)
    #     font_name = random.choice(fontSet)

    #     headingface = freetype.Face(os.path.join(fontPath,font_name))
    #     headingface.set_char_size(self.size * 64)
    #     slot = headingface.glyph

    #     l = int((self.regionInfo['w'] - self.regionInfo['blank'])/self.size)
    #     num_char = max(15, l)
    #     for i in range(num_char):
    #         if i >= len(word):
    #             break
    #         c = word[i]
    #         headingface.load_char(c)
    #         bitmap = slot.bitmap
    #         w,h = bitmap.width, bitmap.rows
    #         kerning = headingface.get_kerning(previous, c)
    #         width += (slot.advance.x >> 6) + (kerning.x >> 6)
            
    #         if width > W + LX - xStart - self.regionInfo['blank']:


    def render_para(self, xStart, yLine, lineNum, LX, W, istitleofchart=False, isFisrtPara=False):
        '''
        渲染一段话，需要输入起始坐标和段行数，返回段尾坐标
        xStart : 起始x坐标
        yLine : 起始y坐标
        lineNum : para的line数目
        LX : 格子的最左上坐标
        W : 格子的总宽
        '''

        #################################
        # 保证采样字符数足够填充段落空间 #
        #################################
        min_charNum = int(lineNum * (W - self.regionInfo['blank'])/self.size)
        word = self.textSample.get_para(lineNum + 10, min_charNum + 10, lan=self.regionInfo['lan']).strip()
        # sample_times = 0    
        # word = self.textSample.sample_para(300,600)
        # word = word.strip()
        # min_charNum = int(lineNum * (W - self.regionInfo['blank'])/self.size)
        # max_charNum = min_charNum
        # if min_charNum < 300:
        #     max_charNum = 600
        # else:
        #     max_charNum = 2 * min_charNum
        # # print("\033[31msample chars start!!!\033[0m")
        # while (len(word) < min_charNum and sample_times < 5) or len(word) == 0:
        #     sample_times += 1
        #     t_word = self.textSample.sample_para(300,max_charNum)
        #     word = word + ' ' + t_word 
        #     word = word.strip()

        # 去除首字母的标点
        for i,c in enumerate(word):
            if c not in self.punc:
                break
        word = word[i:]
        if len(word) < min_charNum:
            print("sample char less than expection, {} {}".format(len(word), min_charNum))
        if self.regionInfo['lan'] == 'en':
            # if self.regionInfo['attr'] == 'text':
            word = word.capitalize()
            if self.regionInfo['attr'] == 'title':
                rv = random.random()
                if rv < 0.3:
                    word = word.upper()
                else:
                    word = word.title()

        if self.regionInfo['lan'] == 'en' and istitleofchart:
            if self.regionInfo['attr'] == 'table' or self.regionInfo['attr'] == 'tableimg':
                word = 'Table %d. '%(random.randint(1,9)) + word
            elif (self.regionInfo['attr'] == 'imagegraph' or self.regionInfo['attr'] == 'image'):
                rv = random.random()
                if rv <= 0.3:
                    word = 'Figure %d: '%(random.randint(1,9)) + word
        elif self.regionInfo['lan'] == 'cn' and istitleofchart:
            if self.regionInfo['attr'] == 'table' or self.regionInfo['attr'] == 'tableimg':
                word = '表%d. '%(random.randint(1,9)) + word
            elif (self.regionInfo['attr'] == 'imagegraph' or self.regionInfo['attr'] == 'image'):
                rv = random.random()
                if rv <= 0.3:
                    word = '图%d: '%(random.randint(1,9)) + word
        elif self.isheading:
            rv = random.random()
            if rv < 0.2:
                pass
            elif rv < 0.6:
                word = '%d. '%(random.randint(1,9)) + word
            else:
                word = '%d.%d. '%(random.randint(1,9), random.randint(1,9)) + word
            word = word.title()
        
        # print("\033[31msample chars end(%d)!!!\033[0m"%(sample_times+1))
        # print("Sampled chars(%d): %s"%(len(word), word[:10]))
        # if len(word) > 100:
        #     print("Sampled chars(%d): %s"%(len(word), word[-100:]))

        # debug code
        # word = "abcdefghijklmnopqrstuvwxyz01234567 a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9 "
        # word = word * min_charNum 
        # word = word.strip()

        ####################
        # 段落字符颜色选取 #
        ####################
        diff = 0          #亮度差值
        rand_color = 'black'
        crop_y_cen = (yLine + yLine+lineNum*self.size)//2
        crop_x_cen = (xStart + xStart+W)//2
        crop_image = self.surface[crop_y_cen-10:crop_y_cen+10, crop_x_cen-10:crop_x_cen+10, :].copy()
        print("render text_bg crop_image shape:{}".format(crop_image.shape))
        while diff < self.thresh_color:
            rand_color = random.choice(list(self.config.color_RGB.keys())) 
            color_v = self.config.color_RGB[rand_color]
            color_l = 0.299*color_v[0] + 0.587*color_v[1] + 0.114*color_v[2]
            c_l = np.mean(0.299*crop_image[:][:][0] + 0.587*crop_image[:][:][1] + 0.114*crop_image[:][:][2])
            diff = math.fabs(color_l-c_l)
        ############
        # 文本填充 #
        ############
        x = xStart
        y = yLine
        width, height, baseline = 0, 0, 0
        
        #首行缩进
        if(lineNum!=1 and self.regionInfo['attr'] == 'text') and self.isheading == False:
            # word = ' ' * 2 + word
            # previous = 0
            # for i in range(self.indentation):
                # self.face.load_char(word[i])
                # kerning = self.face.get_kerning(previous, word[i])
                # previous = word[i]
                # x += (self.slot.advance.x >> 6) + (kerning.x >> 6)
                # width += (self.slot.advance.x >> 6) + (kerning.x >> 6)

            if isFisrtPara and self.regionInfo['lan'] == 'en':
                rv = random.random()
                if rv > 0.3:
                    x += self.size * self.indentation
                    width += self.size * self.indentation                    
            else:
                x += self.size * self.indentation
                width += self.size * self.indentation

        #文本基线
        if self.regionInfo['attr'] == 'title':
            baseline = int(self.size / 5 * 4)
        else:
            baseline = int(self.size / 4 * 3)

        lineFlag = 0
        previous = 0
        newline = True
        para_content = []
        char_rects = []
        line_rects = []
        line_content = []
        line_text = ''
        word_content = [x,yLine+baseline,x,yLine,'']
        linecharcount = 0
        firstlinecharcount = 10
        breakflag = False
        last_h = 0
        for i, c in enumerate(word):

            # 不存在该字符
            if self.face.get_char_index(c) == 0: 
                print("---------------------char %c not exist in font!!!------------------------"%(c))
                continue

            # 首行字符计数
            if(lineFlag==0):
                firstlinecharcount = firstlinecharcount+1;
            
            # 最后一行随机跳出
            if(c==' ' and breakflag==True): 
                break

            try:
                self.face.load_char(c)
            except Exception as e:
                print("**************** load char error ******************* ", c)
                print(e)
                input()
            bitmap = self.slot.bitmap
            w,h = bitmap.width, bitmap.rows
            kerning = self.face.get_kerning(previous, c)
            width += (self.slot.advance.x >> 6) + (kerning.x >> 6)
            # print("self.size {} w {} h {} c {}".format(self.size, w, h, c))

            # 最后一行终止标志
            if lineNum-1 == lineFlag and self.regionInfo['attr'] == 'text':
                linecharcount = linecharcount+1
                min_num = self.indentation
                max_num = firstlinecharcount
                if lineFlag == 0:
                    min_num = int(min_charNum / 2)
                    max_num = int(min_charNum * 2)
                if(linecharcount>random.randint(min_num, max_num)):
                    breakflag = True

            # 是否填充完整行，开始新的文本行(W - (left_margin + right_margin))
            if width > W + LX - xStart - self.regionInfo['blank']:
                width = (self.slot.advance.x >> 6) + (kerning.x >> 6)
                lineFlag += 1
                # if(lineFlag==1):
                #     print("firstlinecharcount: ", firstlinecharcount)
                x = xStart
                yLine += self.regionInfo['lineSpacing'] + self.LINESOFF + self.size
                if self.regionInfo['attr'] == 'title':
                    yLine += TITLEOFF
                if word_content[4].replace(' ','') != '':
                    _word_content = [word_content[0], word_content[1], \
                                     word_content[2], word_content[1], \
                                     word_content[2], word_content[3], \
                                     word_content[0], word_content[3], word_content[4]]
                    line_content.append(_word_content[:])

                #line rect
                line_rect = []
                line_word = ''
                for rect in line_content:
                    if len(rect[:-1]) > 0:
                        line_rect.append(rect[:-1])
                        line_word = line_word + rect[-1]
                if len(line_rect):
                    line_rect = np.array(line_rect)
                    rect = [line_content[0][0], np.min(line_rect, axis=0)[1], \
                            line_content[-1][2], np.min(line_rect, axis=0)[1], \
                            line_content[-1][2], np.max(line_rect, axis=0)[5], \
                            line_content[0][0], np.max(line_rect, axis=0)[5]]
                    line_rects.append(rect)
                    para_content.append(line_content[:])

                    if line_word.strip() != line_text.strip():
                        print('line text !!!!!!')
                        print('line({}): {}, word({}): {}'.format(len(line_text), line_text,  len(line_word), line_word))
                    line_text = ''

                line_content = []
                word_content[0] = xStart
                word_content[1] = yLine + baseline
                word_content[3] = yLine
                word_content[4] = ''
                newline = True
            
            # 填充完毕
            if lineFlag >= lineNum:
                break
            
            # 非首行第一个字母为空则跳过
            if c == ' ' and newline and lineFlag!=0:
                continue

            top = self.slot.bitmap_top
            left = self.slot.bitmap_left
            y = baseline-top + yLine
            x += (kerning.x >> 6)
            char_map_x = x + left

            bitbuffer = np.array(bitmap.buffer, dtype='ubyte').reshape(h,w)
            bit_color = self.color_func(bitbuffer, color=rand_color)
            for ii in range(h):
                for jj in range(w):
                    if bit_color[ii][jj][0] != 255 or bit_color[ii][jj][1] != 255 or bit_color[ii][jj][2] != 255:
                        if(ii+y< self.config.picHeight  and jj+x<self.config.picWidth ):
                            self.surface[ii+y][jj+char_map_x][0:3] = bit_color[ii][jj] 
            # self.surface[y:y+h,x:x+w] = bit_color
            line_text += c
            if h > last_h:
                last_h = h
            if c != ' ':
                if newline:
                    newline = False
                    if len(char_rects) > 0 and len(char_rects[-1][-1]) == 0:
                        char_rects[-1] = char_rects[-1][:-1]
                    char_rects.append([[[char_map_x, y, char_map_x+w, y, char_map_x+w, y+h, char_map_x, y+h]]])
                else:
                    # if c in self.punc:
                    #     if len(char_rects) > 0 and len(char_rects[-1][-1]) != 0:
                    #         char_rects[-1].append([]) 
                    if len(char_rects) > 0:
                        char_rects[-1][-1].append([char_map_x, y, char_map_x+w, y, char_map_x+w, y+h, char_map_x, y+h])
                    if c in self.punc:
                        if len(char_rects) > 0 and len(char_rects[-1][-1]) != 0:
                            char_rects[-1].append([]) 
            elif c == ' ' and not newline:
                if len(char_rects) > 0 and len(char_rects[-1][-1]) != 0:
                    char_rects[-1].append([])

            x += (self.slot.advance.x >> 6)
            previous = c

            # if c == ' ' and word_content[2] == 0:
            #     input()
            #     word_content[0] = x
            #     continue
            
            # 词结尾
            if c == ' ' or (c in self.punc and self.regionInfo['lan'] != 'en'):
                # if c in self.punc:
                #     word_content[2] = char_map_x+w
                #     # if word_content[1] == yLine:
                #     #     word_content[1] = y
                #     if word_content[1] > y:
                #         word_content[1] = y
                #     if word_content[3] < y+h:
                #         word_content[3] = y+h
                    # word_content[4] += c
                # word_content[4] = re.sub(r"[%s]+" %(self.punc), "", word_content[4])
                if word_content[4].replace(' ','') != '':
                    _word_content = [word_content[0], word_content[1], \
                                     word_content[2], word_content[1], \
                                     word_content[2], word_content[3], \
                                     word_content[0], word_content[3], word_content[4]]
                    line_content.append(_word_content[:])
                word_content[0] = x
                word_content[1] = yLine + baseline
                word_content[2] = x
                word_content[3] = yLine
                # if c in self.punc:
                #     word_content[4] = ''
                # else:
                word_content[4] = ' '
                continue

            word_content[2] = char_map_x+w
            # if word_content[1] == yLine:
            #     word_content[1] = y
            if word_content[1] > y:
                word_content[1] = y
            if word_content[3] < y+h:
                word_content[3] = y+h
            word_content[4] += c

        # 处理最后一行字符（如果最后一行未填充满整行，则由本部分代码进行信息处理）
        # word_content[4] = re.sub(r"[%s]+" %(self.punc), "", word_content[4])
        if word_content[4].replace(' ','') != '':
            _word_content = [word_content[0], word_content[1], \
                                word_content[2], word_content[1], \
                                word_content[2], word_content[3], \
                                word_content[0], word_content[3], word_content[4]]
            line_content.append(_word_content[:])
        # para_content.append(line_content[:])
        line_rect = []
        for rect in line_content:
            if len(rect[:-1]) > 0:
                line_rect.append(rect[:-1])
        if len(line_rect) > 0:
            para_content.append(line_content[:])
            line_rect = np.array(line_rect)
            rect = [line_content[0][0], np.min(line_rect, axis=0)[1], \
                    line_content[-1][2], np.min(line_rect, axis=0)[1], \
                    line_content[-1][2], np.max(line_rect, axis=0)[5], \
                    line_content[0][0], np.max(line_rect, axis=0)[5]]
            line_rects.append(rect)
            line_text = ''
        return y+last_h, para_content, line_rects, char_rects

    def render_para_v(self, xLine, yStart, lineNum, LY, H, readir=0):
        '''
        垂直方向, 从左向右画
        渲染一段话，需要输入起始坐标和段行数，返回段尾坐标
        xLine : 起始x坐标
        yStart : 起始y坐标
        lineNum : para的line数目
        LY : 格子的最左上坐标
        H : 格子的总高
        '''

        #################################
        # 保证采样字符数足够填充段落空间 #
        #################################
        min_charNum = int(lineNum * (H - self.regionInfo['blank'])/self.size)
        word = self.textSample.get_para(lineNum + 10, min_charNum + 10, lan=self.regionInfo['lan']).strip()
        # sample_times = 0    
        # word = self.textSample.sample_para(300,600)
        # word = word.strip()
        # min_charNum = int(lineNum * (H - self.regionInfo['blank'])/self.size)
        # max_charNum = min_charNum
        # if min_charNum < 300:
        #     max_charNum = 600
        # else:
        #     max_charNum = 2 * min_charNum
        # # print("\033[31msample chars start!!!\033[0m")
        # while (len(word) < min_charNum and sample_times < 5) or len(word) == 0:
        #     sample_times += 1
        #     t_word = self.textSample.sample_para(300,max_charNum)
        #     word = word + ' ' + t_word 
        #     word = word.strip()
        # 去除首字母的标点
        for i,c in enumerate(word):
            if c not in self.punc:
                break
        word = word[i:]
        if len(word) < min_charNum:
            print("sample char less than expection, {} {}".format(len(word), min_charNum))
        
        # print("\033[31msample chars end(%d)!!!\033[0m"%(sample_times+1))
        # print("Sampled chars(%d): %s"%(len(word), word[:10]))
        # if len(word) > 100:
        #     print("Sampled chars(%d): %s"%(len(word), word[-100:]))

        ####################
        # 段落字符颜色选取 #
        ####################
        diff = 0          #亮度差值
        rand_color = 'black'
        while diff < self.thresh_color:
            rand_color = random.choice(list(self.config.color_RGB.keys())) 
            color_v = self.config.color_RGB[rand_color]
            color_l = 0.299*color_v[0] + 0.587*color_v[1] + 0.114*color_v[2]
            if readir:
                crop_image = self.surface[yStart:yStart+H, xLine-lineNum*self.size:xLine, :].copy()
            else:
                crop_image = self.surface[yStart:yStart+H, xLine:xLine+lineNum*self.size, :].copy()
            c_l = np.mean(0.299*crop_image[:][:][0] + 0.587*crop_image[:][:][1] + 0.114*crop_image[:][:][2])
            diff = math.fabs(color_l-c_l)
        print("color:{} diff:{}".format(rand_color, diff))

        ############
        # 文本填充 #
        ############
        if readir:
            xLine = xLine - int(self.size//2)
        x = xLine
        y = yStart
        width, height, baseline = 0, 0, 0

        # 首行缩进
        if(lineNum!=1 and self.regionInfo['attr'] == 'text'):
            word = ' ' * self.indentation + word 
            # y += self.size * self.indentation
            # height += self.size * self.indentation
        # 文本基线
        if self.regionInfo['attr'] == 'title':
            baseline = 0 #int(self.size / 4)
        else:
            baseline = int(self.size / 2)    

        lineFlag = 0
        previous = 0
        newline = True
        para_content = []
        char_rects = []
        line_rects = []
        line_content = []
        line_text = ''
        word_content = [xLine,yStart,xLine,yStart,'']
        linecharcount = 0
        firstlinecharcount = 10
        breakflag = False

        last_w = 0
        for i, c in enumerate(word):
            if(lineFlag==0):
                firstlinecharcount = firstlinecharcount+1;
            if(c==' ' and breakflag==True):   
                break

            if self.face.get_char_index(c) == 0: # 不存在该字符
                print("---------------------char %c(%c) not exist!!!------------------------"%(c, previous))
                continue
            self.face.load_char(c, FT_LOAD_RENDER | FT_LOAD_VERTICAL_LAYOUT)
            bitmap = self.slot.bitmap
            w,h = bitmap.width, bitmap.rows
            # kerning = self.face.get_kerning(previous, c)
            height += (self.slot.advance.y >> 6) #+ (kerning.x >> 6)
            # print("debugh: ", height, self.slot.advance.x, self.slot.advance.y, kerning.x, kerning.y)
            
            # 最后一行终止标志
            if lineNum-1 == lineFlag and self.regionInfo['attr'] == 'text':
                linecharcount = linecharcount+1
                min_num = self.indentation
                max_num = firstlinecharcount
                if lineFlag == 0:
                    min_num = int(min_charNum / 2)
                    max_num = int(min_charNum * 2)
                if(linecharcount>random.randint(min_num, max_num)):
                    breakflag = True

            if height > H + LY - yStart - self.regionInfo['blank']:
                height = (self.slot.advance.y >> 6) #+ (kerning.x >> 6)
                lineFlag += 1
                # if(lineFlag==1):
                #     print("firstlinecharcount: ", firstlinecharcount)
                y = yStart
                if readir:
                    xLine = xLine - (self.regionInfo['lineSpacing']+self.LINESOFF + self.size)
                    if self.regionInfo['attr'] == 'title':
                        xLine -= TITLEOFF
                else:
                    xLine = xLine + self.regionInfo['lineSpacing']+self.LINESOFF + self.size
                    if self.regionInfo['attr'] == 'title':
                        xLine += TITLEOFF
                if word_content[4].replace(' ','') != '':
                    _word_content = [word_content[2], word_content[1], \
                                     word_content[2], word_content[3], \
                                     word_content[0], word_content[3], \
                                     word_content[0], word_content[1], word_content[4]]
                    line_content.append(_word_content[:])

                #line rect
                line_rect = []
                line_word = ''
                for rect in line_content:
                    if len(rect[:-1]) > 0:
                        line_rect.append(rect[:-1])
                        line_word = line_word + rect[-1]
                if len(line_rect):
                    line_rect = np.array(line_rect)
                    rect = [np.max(line_rect, axis=0)[0], line_content[0][1], \
                            np.max(line_rect, axis=0)[0], line_content[-1][5], \
                            np.min(line_rect, axis=0)[4], line_content[-1][5], \
                            np.min(line_rect, axis=0)[4], line_content[0][1]]
                    line_rects.append(rect)
                    para_content.append(line_content[:])

                    if line_word.strip() != line_text.strip():
                        print('line text !!!!!!')
                        print('line({}): {}, word({}): {}'.format(len(line_text), line_text,  len(line_word), line_word))
                    line_text = ''

                line_content = []
                word_content[0] = xLine
                word_content[1] = yStart
                word_content[2] = xLine
                word_content[4] = ''
                newline = True
            
            if lineFlag >= lineNum:
                break

            #非首行第一个字母为空则跳过
            if c == ' ' and newline and lineFlag!=0:
                continue

            left = self.slot.bitmap_left
            if readir:
                x = left + xLine - baseline
            else:
                x = baseline + left + xLine
            # y += (kerning.y >> 6)
            # print("size {} advance.x {} advance.y {} bitmap_h {} bitmap_top {} c {}".format(self.size, self.slot.advance.x>>6, self.slot.advance.y>>6, h, self.slot.bitmap_top, c))
            # print("left ", left)
            bitbuffer = np.array(bitmap.buffer, dtype='ubyte').reshape(h,w)
            bit_color = self.color_func(bitbuffer, color=rand_color)
            for ii in range(h):
                for jj in range(w):
                    if bit_color[ii][jj][0] != 255 or bit_color[ii][jj][1] != 255 or bit_color[ii][jj][2] != 255:
                        if(ii+y< self.config.picHeight  and jj+x<self.config.picWidth ):
                            self.surface[ii+y][jj+x][0:3] = bit_color[ii][jj] 
            line_text += c
            # self.surface[y:y+h,x:x+w] = bit_color

            if w > last_w:
                last_w = w
            if c != ' ':
                if newline:
                    newline = False
                    if len(char_rects) > 0 and len(char_rects[-1][-1]) == 0:
                        char_rects[-1] = char_rects[-1][:-1]
                    char_rects.append([[[x+w, y, x+w, y+h, x, y+h, x,y]]])
                else:
                    # if c in self.punc:
                    #     if len(char_rects) > 0 and len(char_rects[-1][-1]) != 0:
                    #         char_rects[-1].append([]) 
                    if len(char_rects) > 0:
                        char_rects[-1][-1].append([x+w, y, x+w, y+h, x, y+h, x, y])
                    if c in self.punc:
                        if len(char_rects) > 0 and len(char_rects[-1][-1]) != 0:
                            char_rects[-1].append([]) 

            elif c == ' ' and not newline:
                if len(char_rects) > 0 and len(char_rects[-1][-1]) != 0:
                    char_rects[-1].append([])

            y += (self.slot.advance.y >> 6)
            previous = c

            # if c == ' ' and word_content[3] == 0:
            #     word_content[1] = y
            #     continue
            if c == ' ' or (c in self.punc and self.regionInfo['lan'] != 'en'):
                # word_content[4] = re.sub(r"[%s]+" %(self.punc), "", word_content[4])
                # if c in self.punc:
                    # word_content[3] = y
                    # if word_content[0] == xLine:
                    #     word_content[0] = x
                    # elif word_content[0] > x:
                    #     word_content[0] = x
                    # if word_content[2] < x+w:
                    #     word_content[2] = x+w
                    # word_content[4] += c
                if word_content[4].replace(' ','') != '':
                    _word_content = [word_content[2], word_content[1], \
                                     word_content[2], word_content[3], \
                                     word_content[0], word_content[3], \
                                    word_content[0], word_content[1], word_content[4]]
                    line_content.append(_word_content[:])
                word_content[1] = y
                word_content[3] = y
                word_content[2] = xLine
                # if c in self.punc:
                #     word_content[4] = ''
                # else:
                word_content[4] = ' '
                continue
            word_content[3] = y
            if word_content[0] == xLine:
                word_content[0] = x
            elif word_content[0] > x:
                word_content[0] = x
            if word_content[2] < x+w:
                word_content[2] = x+w
            word_content[4] += c
        
        # 处理最后一行字符（如果最后一行未填充满整行，则由本部分代码进行信息处理）
        # word_content[4] = re.sub(r"[%s]+" %(self.punc), "", word_content[4])
        if word_content[4].replace(' ','') != '':
            _word_content = [word_content[2], word_content[1], \
                                word_content[2], word_content[3], \
                                word_content[0], word_content[3], \
                                word_content[0], word_content[1], word_content[4]]
            line_content.append(_word_content[:])
            # para_content.append(line_content[:])
        line_rect = []
        for rect in line_content:
            if len(rect[:-1]) > 0:
                line_rect.append(rect[:-1])
        if len(line_rect) > 0:
            para_content.append(line_content[:])
            line_rect = np.array(line_rect)
            rect = [np.max(line_rect, axis=0)[0], line_content[0][1], \
                    np.max(line_rect, axis=0)[0], line_content[-1][5], \
                    np.min(line_rect, axis=0)[4], line_content[-1][5], \
                    np.min(line_rect, axis=0)[4], line_content[0][1]]
            line_rects.append(rect) 
            line_text = ''
        # print("{}, {}, {}".format(len(para_content), len(line_rects), len(char_rects))) 
        if readir == 0:
            x += last_w                   
        return x, para_content, line_rects, char_rects

    def table_draw(self):
        '''
        表格的行列数随机，并画出表格线
        返回每个单元格放置文字的起始坐标和行数
        coordSet按行列矩阵排列
        PS:暂时为各表格统一大小
        '''
        # 添加标题
        num_lines = random.randint(1, 3)
        title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
        remain_h = self.regionInfo['h'] - title_h
        if remain_h <= int(self.regionInfo['h'] // 2) and num_lines != 1:
            num_lines = 1
            title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
            remain_h = self.regionInfo['h'] - title_h                
        self.regionInfo['h'] = remain_h

        height = self.size + 2*self.regionInfo['blank']
        width  = self.size + 2*self.regionInfo['blank']
        maxRow = (int)(self.regionInfo['h'] / height)
        maxCol = (int)(self.regionInfo['w'] / width)
        # print(maxRow, maxCol)
        if maxRow <= 2 or maxCol <= 2:
            self.size = self.font_size[0]
            self.LINESOFF = int(LINESOFF * self.size + 0.5)
            self.face.set_char_size(self.size * 64)
            height = self.size + 2*self.regionInfo['blank']
            width  = self.size + 2*self.regionInfo['blank']
            maxRow = (int)(self.regionInfo['h'] / height)
            maxCol = (int)(self.regionInfo['w'] / width)
        if maxRow < 2 or maxCol < 2:
            print("\033[31mCan't draw table in this small region[0m\033")
            print("\033[31 h/line {}/{} w/line {}/{}  [0m\033".format(self.regionInfo['h'], \
                maxRow, self.regionInfo['w'], maxCol))                
            return None, None, None, None, None


        # 标题位置
        rv = random.random()
        if rv > 0.2: # 表格标题在表格上面
            y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], int(self.regionInfo['y'] + self.regionInfo['blank']/2), num_lines, self.regionInfo['x'], self.regionInfo['w'], True)
            self.regionInfo['y'] += title_h

        min_r = 2 if maxRow // 3 < 2 else int(maxRow // 3)
        min_c = 2 if maxCol // 3 < 2 else int(maxCol // 3)
        row = random.randint(min_r, maxRow)
        col = random.randint(min_c, maxCol)

        # 用列表，后续代码可以方便的修改，生成行高列宽不等的表格
        rowHeight = [(int)(self.regionInfo['h'] / row) for _ in range(row)]
        colWidth  = [(int)(self.regionInfo['w'] / col) for _ in range(col)]
        rowY = [self.regionInfo['y']]
        for i,param in enumerate(rowHeight):
            rowY.append(rowY[i] + param)
        colX = [self.regionInfo['x']]
        for i,param in enumerate(colWidth):
            colX.append(colX[i] + param)
        #print colX, rowY

        ## table draw
        for i,X in enumerate(colX):
            self.surface[rowY[0]:rowY[-1], X-1:X+1, :] = 0
        for i,Y in enumerate(rowY):
            self.surface[Y-1:Y+1, colX[0]:colX[-1], :] = 0

        coordSet = []
        for i in range(row):
            coords = []
            for j in range(col):
                coord = [colX[j]+self.regionInfo['blank'], rowY[i]+self.regionInfo['blank']]
                coords.append(coord)
            coordSet.append(coords)
        
        lineNum = []
        toff = 0
        if self.regionInfo['attr'] == 'title':
            toff = TITLEOFF
        for i in range(row):
            wordH = rowHeight[i] - 2*self.regionInfo['blank']
            row_linenum = []
            for j in range(col):
                numL = (int)((wordH - self.size) / (self.size + toff + self.regionInfo['lineSpacing']+self.LINESOFF)) + 1
                row_linenum.append(numL)
            lineNum.append(row_linenum)

        if rv <= 0.2: # 表格标题在表格下面
            y = self.regionInfo['y'] + self.regionInfo['h'] + int(self.regionInfo['blank'] / 2)
            y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], y, num_lines, self.regionInfo['x'], self.regionInfo['w'], True)

        return coordSet, lineNum, colX[:-1], colWidth, [para_content, line_rects, char_rects]
    
    def seamlessClone(self, img, x, y, w, h):
        border_w = 5
        border_t = min(border_w,y)
        border_b = max(min(border_w, self.surface.shape[0]-y-h-1),0)
        border_l = min(border_w,x)
        border_r = max(min(border_w, self.surface.shape[1]-x-w-1),0)
        center = (int(x + w / 2), int(y + h / 2))
        print("seamlessClone>>>> self.surface.shape:{},{},{} ".format(self.surface.shape, img.shape, center))
        print("borders: {},{},{},{}".format(border_t, border_b, border_l, border_r))
        img=cv2.copyMakeBorder(img, border_t, border_b, border_l, border_r, cv2.BORDER_CONSTANT,value=(255,255,255))
        center = (int(x + img.shape[1] / 2 - border_l), int(y + img.shape[0] / 2 - border_t))
        print("seamlessClone>>>> self.surface.shape:{},{},{} ".format(self.surface.shape, img.shape, center))
        src_mask = 255 * np.ones(img.shape, img.dtype)        
        # img[img > 0] = 255
        mixed_clone  = cv2.seamlessClone(img, self.surface, src_mask, center, cv2.MIXED_CLONE)#.MONOCHROME_TRANSFER)#cv2.MIXED_CLONE)#cv2.NORMAL_CLONE)#
        return mixed_clone

    def lineRects2RegionRects(self, line_rects):
        point_array = np.array(line_rects, dtype = int) # x0 y0 x1 y1 x2 y2 x3 y3
        max_position = np.argmax(point_array, axis = 0) # 按列获得每列最大值所在行的索引
        min_position = np.argmin(point_array, axis = 0)
        x_max = max([point_array[max_position[0]][0],point_array[max_position[2]][2],point_array[max_position[4]][4],point_array[max_position[6]][6]])
        x_min = min([point_array[min_position[0]][0],point_array[min_position[2]][2],point_array[min_position[4]][4],point_array[min_position[6]][6]])
        y_max = max([point_array[max_position[1]][1],point_array[max_position[3]][3],point_array[max_position[5]][5],point_array[max_position[7]][7]])
        y_min = min([point_array[min_position[1]][1],point_array[min_position[3]][3],point_array[min_position[5]][5],point_array[min_position[7]][7]])

        region_rect = [[x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max]]
        return region_rect

    def addBackground(self, x=0, y=0, line_num=0):
        paperbkSet = []
        # for _, _, filenames in os.walk('./content/paperbackground'):
        #     for filename in filenames:
        #         if filename.endswith('jpg'):
        #             paperbkSet.append(filename)
        for _, _, filenames in os.walk('./content/regionbackground'):
            for filename in filenames:
                if filename.endswith('jpg'):
                    paperbkSet.append(filename)
        paperbk_name = random.choice(paperbkSet)
        print("Add region background {}!!!".format(paperbk_name))

        w, h = self.regionInfo['w'], self.regionInfo['h']
        if self.regionInfo['attr'] == 'title' or self.isheading:            
            line_spacing = self.regionInfo['lineSpacing'] + self.LINESOFF
            if self.regionInfo['dir'] == 'h':
                h = (line_num[0] - 1) * (self.size + line_spacing) + self.size
            else:
                w = (line_num[0] - 1) * (self.size + line_spacing) + self.size
            w = min(w, self.surface.shape[1]-x)
            h = min(h, self.surface.shape[0]-y)
        
        paperbkimg = Image.open(os.path.join('./content/regionbackground', paperbk_name))
        paperbkimg = paperbkimg.resize((w, h))
        paperbkimg = paperbkimg.convert("RGB")
        array_img = np.array(paperbkimg)
        if self.regionInfo['attr'] == 'title' or self.isheading:
            self.surface[y:y+h, x:x+w] = array_img 
        else:
            # print(array_img.shape, self.regionInfo['x'], self.regionInfo['y'], w, h)
            self.surface = self.seamlessClone(array_img, self.regionInfo['x'], self.regionInfo['y'], w, h)

    def render(self):
        rects = []
        if self.regionInfo['attr'] == 'text':
            if self.regionInfo['dir'] == 'h':
                region = self.regionInfo
                para_spacing = region['paraSpacing']+PARASOFF
                x = region['x'] + region['blank']
                y = region['y']
                
                para_line_num = None
                if self.isheading:
                    para_line_num = [1]
                elif region['paraSpacing'] == -1:
                    _, para_line_num = self.random_para()
                    x = random.randint(x+2*region['blank'], region['x'] + region['w']-3*region['blank'])
                    if y < self.config.picHeight // 2:
                        y += (region['h'] - self.size - region['blank']//2)
                    else:
                        y += region['blank'] // 2
                    para_line_num = [1]
                else:
                    _, para_line_num = self.random_para()
                    y += region['blank'] // 2
                    # 区域过小,不足以容纳三行,跳过该区域
                    if len(para_line_num) <= 1 and para_line_num[0]<3: 
                        return self.surface, rects

                # print("\033[32m********para_line_num************: {}\033[0m".format(para_line_num))

                # 添加区域背景
                rv = random.random()
                if rv < 0.02:
                    self.addBackground(region['x'], y, para_line_num)
                    self.thresh_color = self.color_diff[1] 

                for i, line_num in enumerate(para_line_num):
                    if i==0:
                        y, para_content, line_rects, char_rects = self.render_para(x, y, line_num, region['x'], region['w'], isFisrtPara=True)
                    else:
                        y, para_content, line_rects, char_rects = self.render_para(x, y, line_num, region['x'], region['w'])
                    y += para_spacing

                    if len(rects) == 0:
                        rects.append(para_content)
                        rects.append(line_rects)
                        rects.append(char_rects)
                        rects.append(self.lineRects2RegionRects(line_rects))
                    else:
                        rects[0].extend(para_content)
                        rects[1].extend(line_rects)
                        rects[2].extend(char_rects)
                        rects[3].extend(self.lineRects2RegionRects(line_rects))
                        
                o_y = self.regionInfo['y']
                self.regionInfo['y'] = y - para_spacing + 2*self.LINESOFF
                self.regionInfo['h'] = self.regionInfo['h'] - (self.regionInfo['y'] - o_y)
                return self.surface, rects
            else:
                region = self.regionInfo
                para_spacing = region['paraSpacing']+PARASOFF #+VPARASOFF
                readir = self.readir#random.randint(0,1)
                x = region['x']                   
                y = region['y'] + region['blank']
                _, para_line_num = self.random_para_v()
                # print("\033[32m********para_line_num************: {}\033[0m".format(para_line_num))
                if region['paraSpacing'] == -1:
                    readir = 0
                    y = random.randint(y+2*region['blank'], region['y'] + region['h']-4*region['blank'])
                    if x < self.config.picWidth // 2:
                        x += (region['w'] - self.size - region['blank'])
                    else:
                        pass
                        # x += region['blank']//2
                    para_line_num = [1]
                # 区域过小,不足以容纳三行,跳过
                else:
                    x = x + region['blank']//2
                    if len(para_line_num) <= 1 and para_line_num[0]<3: 
                        return self.surface, rects

                if readir:
                    x = region['x'] + region['w'] - region['blank']//2   
                # 添加区域背景
                rv = random.random()
                if rv < 0.02:
                    self.addBackground()
                    self.thresh_color = self.color_diff[1] 
                for line_num in para_line_num:
                    x, para_content, line_rects, char_rects = self.render_para_v(x, y, line_num, region['y'], region['h'], readir)
                    if readir:
                        x -= para_spacing
                    else:
                        x += para_spacing
                    if len(rects) == 0:
                        rects.append(para_content)
                        rects.append(line_rects)
                        rects.append(char_rects)
                        rects.append(self.lineRects2RegionRects(line_rects))
                    else:
                        rects[0].extend(para_content)
                        rects[1].extend(line_rects)
                        rects[2].extend(char_rects)
                        rects[3].extend(self.lineRects2RegionRects(line_rects))

                return self.surface, rects

        if self.regionInfo['attr'] == 'title':
            if self.regionInfo['dir'] == 'h':
                region = self.regionInfo
                para_spacing = region['paraSpacing']+PARASOFF
                _, para_line_num = self.random_para()
                x = region['x'] + region['blank']
                y = region['y'] + (region['h'] - self.size*para_line_num[0])//2 
                # print("para_line_num: ", para_line_num)

                # 添加区域背景
                rv = random.random()
                if rv < 0.2:
                    self.addBackground(region['x'], y, para_line_num)
                    self.thresh_color = self.color_diff[1]

                for line_num in para_line_num:
                    y, para_content, line_rects, char_rects = self.render_para(x, y, line_num, region['x'], region['w'])
                    y += para_spacing
                    if len(rects) == 0:
                        rects.append(para_content)
                        rects.append(line_rects)
                        rects.append(char_rects)
                        rects.append(self.lineRects2RegionRects(line_rects))
                    else:
                        rects[0].extend(para_content)
                        rects[1].extend(line_rects)
                        rects[2].extend(char_rects)
                        rects[3].extend(self.lineRects2RegionRects(line_rects))

                return self.surface, rects
            else:
                region = self.regionInfo
                para_spacing = region['paraSpacing']+PARASOFF #+VPARASOFF
                _, para_line_num = self.random_para_v()
                y = region['y'] + region['blank']
                x = region['x'] + (region['w'] - self.size*para_line_num[0])//2
                # print("para_line_num: ", para_line_num)

                # 添加区域背景
                rv = random.random()
                if rv < 0.2:
                    self.addBackground(x, region['y'], para_line_num)
                    self.thresh_color = self.color_diff[1]
                for line_num in para_line_num:
                    x, para_content, line_rects, char_rects = self.render_para_v(x, y, line_num, region['y'], region['h'])
                    x += para_spacing
                    if len(rects) == 0:
                        rects.append(para_content)
                        rects.append(line_rects)
                        rects.append(char_rects)
                        rects.append(self.lineRects2RegionRects(line_rects))
                    else:
                        rects[0].extend(para_content)
                        rects[1].extend(line_rects)
                        rects[2].extend(char_rects)
                        rects[3].extend(self.lineRects2RegionRects(line_rects))

                return self.surface, rects
        
        if self.regionInfo['attr'] == 'table':

            print("------------------------------table-----------------------------------")
            coordSet, lineNum, colX, colWidth, title_rects = self.table_draw()
            if coordSet == None:
                return self.surface, rects 
            for coords, lNum in zip(coordSet, lineNum): 
                for coord, line_num, leftX, W in zip(coords, lNum, colX, colWidth):
                    y, para_content, line_rects, char_rects = self.render_para(coord[0], coord[1], line_num, leftX, W)
                    # if len(rects) == 0: #TODO 文本框中添加表格文本坐标
                    #     rects.append(para_content)
                    #     rects.append(line_rects)
                    #     rects.append(char_rects)
                    #     rects.append(self.lineRects2RegionRects(line_rects))
                    # else:
                    #     rects[0].extend(para_content)
                    #     rects[1].extend(line_rects)
                    #     rects[2].extend(char_rects)
                    #     rects[3].extend(self.lineRects2RegionRects(line_rects))     
            
            if len(rects) == 0:
                rects.append(title_rects[0])
                rects.append(title_rects[1])
                rects.append(title_rects[2])
                rects.append(self.lineRects2RegionRects(title_rects[1]))                  
            else:
                rects[0].extend(title_rects[0])
                rects[1].extend(title_rects[1])
                rects[2].extend(title_rects[2])
                rects[3].extend(self.lineRects2RegionRects(title_rects[1]))       
                    
            return self.surface, rects

        if self.regionInfo['attr'] == 'tableimg':

            # 添加标题
            num_lines = random.randint(1, 3)
            title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
            remain_h = self.regionInfo['h'] - title_h
            if remain_h <= int(self.regionInfo['h'] // 2) and num_lines != 1:
                num_lines = 1
                title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
                remain_h = self.regionInfo['h'] - title_h                
            self.regionInfo['h'] = remain_h

            # 标题位置
            rv = random.random()
            if rv > 0.2: # 表格标题在表格上面
                y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], int(self.regionInfo['y'] + self.regionInfo['blank']/2), num_lines, self.regionInfo['x'], self.regionInfo['w'], True)
                self.regionInfo['y'] += title_h

            imgPath = './content/table'
            imageSet = []
            for _, _, filenames in os.walk(imgPath):
                for filename in filenames:
                    if filename.endswith('jpg') or filename.endswith('png'):
                        imageSet.append(filename)

            w = self.config.picWidth
            h = self.config.picHeight
            time = 0
            img = None
            ratio = 100
            thresh = 1.3
            while int(w//thresh)>=self.regionInfo['w'] or int(h//thresh)>=self.regionInfo['h'] or int(w*thresh)<=self.regionInfo['w'] or int(h*thresh)<=self.regionInfo['h'] or img==None:
                imgName = random.choice(imageSet)
                imageSet.remove(imgName)
                print("Figure path : {}".format(imgName))
                _img = Image.open(os.path.join(imgPath, imgName))
                _img = _img.convert("RGB")
                _w = _img.size[0]
                _h = _img.size[1]
                _ratio = abs(_w * 1.0 / self.regionInfo['w'] - 1) + abs(_h * 1.0 / self.regionInfo['h'] - 1)
                if _ratio < ratio:
                    img = _img
                    ratio = _ratio
                    w = _w
                    h = _h
                time+=1
                if time >= 20:
                    break
                print("-------------------------sample tableimg-------------------------")
            print("sample times {}-{},{} {},{}".format(time, w,h, self.regionInfo['w'], self.regionInfo['h']))
            imSize = (self.regionInfo['w'], self.regionInfo['h'])
            img = img.resize(imSize)
            matrix = np.array(img)
            sX, sY = self.regionInfo['x'], self.regionInfo['y']
            tW, tH = self.regionInfo['w'], self.regionInfo['h']
            if self.random_bg:
                self.surface = self.seamlessClone(matrix, sX, sY, tW, tH)
            else:
                self.surface[sY:sY+tH, sX:sX+tW] = matrix

            if rv <= 0.2: # 表格标题在表格下面
                y = self.regionInfo['y'] + self.regionInfo['h'] + int(self.regionInfo['blank'] / 2)
                y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], y, num_lines, self.regionInfo['x'], self.regionInfo['w'], True)

            if len(rects) == 0:
                rects.append(para_content)
                rects.append(line_rects)
                rects.append(char_rects)
                rects.append(self.lineRects2RegionRects(line_rects))
            else:
                rects[0].extend(para_content)
                rects[1].extend(line_rects)
                rects[2].extend(char_rects)
                rects[3].extend(self.lineRects2RegionRects(line_rects))

            return self.surface, rects


        if self.regionInfo['attr'] == 'image' :

            # 添加标题
            num_lines = random.randint(1, 3)
            title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
            remain_h = self.regionInfo['h'] - title_h
            if remain_h <= int(self.regionInfo['h'] // 2) and num_lines != 1:
                num_lines = 1
                title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
                remain_h = self.regionInfo['h'] - title_h                
            self.regionInfo['h'] = remain_h
            
            # # 标题位置
            # rv = random.random()
            # if rv > 0.8: # 表格标题在表格上面
            #     y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], self.regionInfo['y'], num_lines, self.regionInfo['x'], self.regionInfo['w'], True)
            #     self.regionInfo['y'] += title_h

            # 选择图像类型适应不同的训练需求
            imgPath = './content/image_word'
            gt_char = None
            gt_word = None
            if self.region_image_type == regionImageType[1]:
                imgPath = './content/synthtext'  
                gt_char = scio.loadmat(os.path.join(imgPath, 'char_info.mat'))
                gt_word = scio.loadmat(os.path.join(imgPath, 'word_info.mat'))
            elif self.region_image_type == regionImageType[2]:
                imgPath = './content/image_noword'  

            # 随机选取图像，嵌入到surface中
            imageSet = []
            for _, _, filenames in os.walk(imgPath):
                for filename in filenames:
                    if filename.endswith('jpg'):
                        imageSet.append(filename)

            w = self.config.picWidth
            h = self.config.picHeight
            time = 0
            img = None
            ratio = 100
            while int(w//1.5)>=self.regionInfo['w'] or int(h//1.5)>=self.regionInfo['h'] or int(w*1.5)<=self.regionInfo['w'] or int(h*1.5)<=self.regionInfo['h'] or img==None:
                imgName = random.choice(imageSet)
                imageSet.remove(imgName)
                print("Figure path : {}".format(imgName))
                _img = Image.open(os.path.join(imgPath, imgName))
                _img = _img.convert("L").convert("RGB")
                _w = _img.size[0]
                _h = _img.size[1]
                _ratio = abs(_w * 1.0 / self.regionInfo['w'] - 1) + abs(_h * 1.0 / self.regionInfo['h'] - 1)
                if _ratio < ratio:
                    img = _img
                    ori_size = _img.size
                    ratio = _ratio
                    w = _w
                    h = _h
                time+=1
                if time >= 20:
                    break
                print("-------------------------sample image-------------------------")
            print("sample times {}-{},{} {},{}".format(time, w,h, self.regionInfo['w'], self.regionInfo['h']))
            imSize = (self.regionInfo['w'], self.regionInfo['h'])
            img = img.resize(imSize)
            matrix = np.array(img)
            sX, sY = self.regionInfo['x'], self.regionInfo['y']
            tW, tH = self.regionInfo['w'], self.regionInfo['h']
            # self.surface = np.stack((self.surface,)*3, axis=-1)
            # print self.surface.shape
            # print(sY, sY+tH, sX, sX+tW)
            if self.random_bg:
                self.surface = self.seamlessClone(matrix, sX, sY, tW, tH)
            else:
                self.surface[sY:sY+tH, sX:sX+tW] = matrix
            

            # if rv <= 0.8: # 表格标题在表格下面
            y = self.regionInfo['y'] + self.regionInfo['h'] + int(self.regionInfo['blank'] / 2)
            y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], y, num_lines, self.regionInfo['x'], self.regionInfo['w'], True)

            # 插入的图像中的文本框信息
            if self.region_image_type == 'image_with_textGT':
                temp = gt_char[imgName][0]
                if len(np.array(gt_char[imgName][0][0]).shape) < 3:
                    temp = gt_char[imgName]
                gt_char = temp 
                gt_word = gt_word[imgName]
                assert (len(gt_char) == gt_word.shape[0])
                # print("------------------------", len(gt_char) )
                # print("------------------------", gt_word.shape)
                # if len(gt_char) != gt_word.shape[0]:
                #     print("!!!!!!!!!!!!!!!!!\n!!!!!!!!!!!!!!!!!!!!!")
                #     try:
                #         print(temp)
                #         print(gt_char)
                #         print(gt_word)
                #     except Exception as e:
                #         print(e)
                #     input()
                word_rects = []
                line_rects = []
                char_rects = []
                for i in range(len(gt_char)):
                    char_box = gt_char[i] 

                    char_box[..., 0] *= imSize[0]/ori_size[0]
                    char_box[..., 1] *= imSize[1]/ori_size[1]
                    char_box[..., 0] += sX
                    char_box[..., 1] += sY

                    gt_word[i][..., 0] *= imSize[0]/ori_size[0]
                    gt_word[i][..., 1] *= imSize[1]/ori_size[1]
                    gt_word[i][..., 0] += sX
                    gt_word[i][..., 1] += sY

                    char_box = char_box.reshape(-1, 8).tolist()
                    word_box = gt_word[i].reshape(-1).tolist()
                    word_box.append(0)
                    char_rects.append([char_box])
                    word_rects.append([word_box])
                    line_rects.append(gt_word[i].reshape(-1).tolist())
                rects.append(word_rects)
                rects.append(line_rects)
                rects.append(char_rects) 

            if len(rects) == 0:
                rects.append(para_content)
                rects.append(line_rects)
                rects.append(char_rects)
                rects.append(self.lineRects2RegionRects(line_rects))
            else:
                rects[0].extend(para_content)
                rects[1].extend(line_rects)
                rects[2].extend(char_rects)
                rects[3].extend(self.lineRects2RegionRects(line_rects))

            return self.surface, rects

        if self.regionInfo['attr'] == 'imagegraph':
            # 添加标题
            num_lines = random.randint(1, 3)
            title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
            remain_h = self.regionInfo['h'] - title_h
            if remain_h <= int(self.regionInfo['h'] // 2) and num_lines != 1:
                num_lines = 1
                title_h = (self.size  + self.LINESOFF) * (num_lines - 1) + self.size * 2
                remain_h = self.regionInfo['h'] - title_h                
            self.regionInfo['h'] = remain_h

            # # 标题位置
            # rv = random.random()
            # if rv > 0.9: # 表格标题在表格上面
            #     y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], self.regionInfo['y'], num_lines, self.regionInfo['x'], self.regionInfo['w'], True)
            #     self.regionInfo['y'] += title_h

            imgPath = './content/graph'
            imageSet = []
            for _, _, filenames in os.walk(imgPath):
                for filename in filenames:
                    if filename.endswith('jpg') or filename.endswith('png'):
                        imageSet.append(filename)

            w = self.config.picWidth
            h = self.config.picHeight
            time = 0
            img = None
            ratio = 100
            while int(w//1.5)>=self.regionInfo['w'] or int(h//1.5)>=self.regionInfo['h'] or int(w*1.5)<=self.regionInfo['w'] or int(h*1.5)<=self.regionInfo['h'] or img==None:
                imgName = random.choice(imageSet)
                imageSet.remove(imgName)
                print("Figure path : {}".format(imgName))
                _img = Image.open(os.path.join(imgPath, imgName))
                _img = _img.convert("RGB")
                _w = _img.size[0]
                _h = _img.size[1]
                _ratio = abs(_w * 1.0 / self.regionInfo['w'] - 1) + abs(_h * 1.0 / self.regionInfo['h'] - 1)
                if _ratio < ratio:
                    img = _img
                    ratio = _ratio
                    w = _w
                    h = _h
                time+=1
                if time >= 20:
                    break
                print("-------------------------sample imagegraph-------------------------")
            print("sample times {}-{},{} {},{}".format(time, w,h, self.regionInfo['w'], self.regionInfo['h']))
            imSize = (self.regionInfo['w'], self.regionInfo['h'])
            img = img.resize(imSize)
            matrix = np.array(img)
            sX, sY = self.regionInfo['x'], self.regionInfo['y']
            tW, tH = self.regionInfo['w'], self.regionInfo['h']
            # self.surface = np.stack((self.surface,)*3, axis=-1)
            # print self.surface.shape
            '''
            for ii in range(tH):
                for jj in range(tW):
                    if matrix[ii][jj][0] <= 100 or matrix[ii][jj][1] <= 100 or matrix[ii][jj][2] <= 100:
                        self.surface[ii+sY][jj+sX][0:3] = matrix[ii][jj] 
            '''
            if self.random_bg:
                self.surface = self.seamlessClone(matrix, sX, sY, tW, tH)
            else:
                self.surface[sY:sY+tH, sX:sX+tW] = matrix

            # if rv <= 0.8: # 表格标题在表格下面
            y = self.regionInfo['y'] + self.regionInfo['h'] + int(self.regionInfo['blank'] / 2)
            y, para_content, line_rects, char_rects = self.render_para(self.regionInfo['x'], y, num_lines, self.regionInfo['x'], self.regionInfo['w'], True)

            if len(rects) == 0:
                rects.append(para_content)
                rects.append(line_rects)
                rects.append(char_rects)
                rects.append(self.lineRects2RegionRects(line_rects))
            else:
                rects[0].extend(para_content)
                rects[1].extend(line_rects)
                rects[2].extend(char_rects)
                rects[3].extend(self.lineRects2RegionRects(line_rects))

            return self.surface, rects

        if self.regionInfo['attr'] == 'formula':
            imgPath = './content/formula'
            imageSet = []
            for _, _, filenames in os.walk(imgPath):
                for filename in filenames:
                    if filename.endswith('jpg'):
                        imageSet.append(filename)

            self.regionInfo['w'] = self.regionInfo['w'] - self.regionInfo['blank']*3
            self.regionInfo['x'] = self.regionInfo['x'] + int(self.regionInfo['blank']*1.5)
            w = self.config.picWidth
            h = self.config.picHeight
            time = 0
            img = None
            ratio = 100
            while int(w//1.5)>=self.regionInfo['w'] or int(h//1.5)>=self.regionInfo['h'] or int(w*1.5)<=self.regionInfo['w'] or int(h*1.5)<=self.regionInfo['h'] or img==None:
                imgName = random.choice(imageSet)
                imageSet.remove(imgName)
                print("Figure path : {}".format(imgName))
                _img = Image.open(os.path.join(imgPath, imgName))
                _img = _img.convert("RGB")
                _w = _img.size[0]
                _h = _img.size[1]
                _ratio = abs(_w * 1.0 / self.regionInfo['w'] - 1) + abs(_h * 1.0 / self.regionInfo['h'] - 1)
                if _ratio < ratio:
                    img = _img
                    ratio = _ratio
                    w = _w
                    h = _h
                time+=1
                if time >= 20:
                    break
                print("-------------------------sample formula-------------------------")
            print("sample times {}-{},{} {},{}".format(time, w,h, self.regionInfo['w'], self.regionInfo['h']))
            imSize = (self.regionInfo['w'], self.regionInfo['h'])
            img = img.resize(imSize)
            matrix = np.array(img)
            sX, sY = self.regionInfo['x'], self.regionInfo['y']
            tW, tH = self.regionInfo['w'], self.regionInfo['h']
            # self.surface = np.stack((self.surface,)*3, axis=-1)
            # print self.surface.shape
            '''
            for ii in range(tH):
                for jj in range(tW):
                    if matrix[ii][jj][0] <= 100 or matrix[ii][jj][1] <= 100 or matrix[ii][jj][2] <= 100:
                        self.surface[ii+sY][jj+sX][0:3] = matrix[ii][jj] 
            '''
            if self.random_bg:
                self.surface = self.seamlessClone(matrix, sX, sY, tW, tH)
            else:
                self.surface[sY:sY+tH, sX:sX+tW] = matrix

            return self.surface, rects
        
        sys.stderr.write('Unknown attr!\n')
        raise SystemExit(1)
        return None
        # im = Image.fromarray(self.surface)
        # im.save('test.jpg')
                

class TextSource(object):
    """
    从文本库中选取文本
    """
    def __init__(self, min_nchar, fn):
        self.min_nchar = min_nchar
        with open(fn,'r', encoding='utf-8') as f:
            self.txt = [l.strip() for l in f.readlines()]
            # print(self.txt)

        self.p_para_nline = [1.0,1.0]#[1.7,3.0] # beta: (a, b), max_nline
        self.p_para_nword = [1.7,3.0,10] # beta: (a,b), max_nword
        self.center_para = 0.5
    
    def check_symb_frac(self, txt, f=0.35):
        """
        T/F return : T iff fraction of symbol/special-charcters in
                     txt is less than or equal to f (default=0.25).
        """
        return np.sum([not ch.isalnum() for ch in txt])/(len(txt)+0.0) <= f
    
    def center_align(self, lines):
        """
        PADS lines with space to center align them
        lines : list of text-lines.
        """
        ls = [len(l) for l in lines]
        max_l = max(ls)
        for i in range(len(lines)):
            l = lines[i].strip()
            dl = max_l-ls[i]
            lspace = dl//2
            rspace = dl-lspace
            lines[i] = ' '*lspace+l+' '*rspace
        return lines

    def is_good(self, txt, f=0.35):
        """
        T/F return : T iff the lines in txt (a list of txt lines)
                     are "valid".
                     A given line l is valid iff:
                         1. It is not empty.
                         2. symbol_fraction > f
                         3. Has at-least self.min_nchar characters
                         4. Not all characters are i,x,0,O,-
        """
        # return False
        def is_txt(l):
            char_ex = ['i','I','o','O','0','-']
            chs = [ch in char_ex for ch in l]
            return not np.all(chs)

        return [ (len(l)> self.min_nchar
                 and self.check_symb_frac(l,f)
                 and is_txt(l)) for l in txt ]
    
    def get_lines(self, nline, nword, nchar_max, f=0.35, niter=100):
        def h_lines(niter=100):
            lines = ['']
            iter = 0
            while not np.all(self.is_good(lines,f)) and iter < niter:
                iter += 1
                # print(len(self.txt), nline)
                try:
                    line_start = np.random.choice(len(self.txt)-nline)
                except:
                    continue
                lines = [self.txt[line_start+i] for i in range(nline)]
            return lines

        lines = ['']
        iter = 0
        while not np.all(self.is_good(lines,f)) and iter < niter:
            iter += 1
            lines = h_lines(niter=100)
            # get words per line:
            nline = len(lines)
            for i in range(nline):
                words = lines[i].split()
                dw = len(words)-nword[i]
                if dw > 0:
                    first_word_index = random.choice(range(dw+1))
                    lines[i] = ' '.join(words[first_word_index:first_word_index+nword[i]])

                while len(lines[i]) > nchar_max: #chop-off characters from end:
                    if not np.any([ch.isspace() for ch in lines[i]]):
                        lines[i] = ''
                    else:
                        lines[i] = lines[i][:len(lines[i])-lines[i][::-1].find(' ')].strip()
        
        if not np.all(self.is_good(lines,f)):
            return #None
        else:
            return lines
    
    def get_para(self, nline_min, nchar_min, lan='en', niter=100):
        lines = []
        chars_num = 0       
        iter = 0
        while iter < niter and chars_num < nchar_min:
            sam_l = min(nline_min, len(self.txt))
            line_start = np.random.choice(len(self.txt)-sam_l)
            for i in range(sam_l): 
                lines.append(self.txt[line_start+i]) 
                chars_num += len(self.txt[line_start+i])
                
        for i,line in enumerate(lines):
            lines[i] = lines[i].strip()

        if lan == 'en':
            return ' '.join(lines)
        elif lan == 'cn' or lan == 'jp':
            return ''.join(lines)

    def sample_para(self,nline_max,nchar_max):
        # get number of lines in the paragraph:
        nline = nline_max*sstat.beta.rvs(a=self.p_para_nline[0], b=self.p_para_nline[1])
        nline = max(1, int(np.ceil(nline)))
        # print("nline:", nline)

        # get number of words:
        nword = [self.p_para_nword[2]*sstat.beta.rvs(a=self.p_para_nword[0], b=self.p_para_nword[1])
                 for _ in range(nline)]
        # print("nword:", nword)
        nword = [max(1,int(np.ceil(n))) for n in nword]
        # print("nword:", nword)

        # print("nchar_max", nchar_max)
        # lines = ['aaa', 'bbb', 'ccc', 'ddd', 'eeed'] # 
        lines = self.get_lines(nline, nword, nchar_max, f=0.35)
        if lines is not None:
            # center align the paragraph-text:
            if np.random.rand() < self.center_para:
                lines = self.center_align(lines)
            for i,line in enumerate(lines):
                lines[i] = lines[i].strip()
            # return '\n'.join(lines)
            return ' '.join(lines)
        else:
            return ''